* Author: Anina Schwarzenbach
* generated October 2014
* reviewed November 2015
* last review August 2020

* clean dataset
clear
use "../../STATAfiles/S11_workfile_20150327.dta"

set more off
*load ADOS
sysdir set PERSONAL "../../../ados"

*drop var occupational activity
drop f8m1 f8m2_1 f8m2_2 f8m2r
drop f8v1 f8v2_1 f8v2_2 f8v2r

saveold "../../STATAfiles/S11_workfile_20150327clean.dta", replace
compress

clear
use "../../STATAfiles/S11_workfile_20150327clean.dta"
*load ADOS
sysdir set PERSONAL "../../../ados"


*******************************************************************************************
*cleaning of migration variables + building of ethnicity variables GERMANY
******************************************************************************************

***** STEP 1 : GENERATE 1 VAR KID 2 VAR PARENTS 4 VAR GRANDPARENTS *******

*input variables for building ethnic background variables 

foreach v of var a3_1r a4_1_1r a4_2_1r a5_1_1r a5_2_1r a6_1_1r a6_2_1r  {
	fre `v'
}
 foreach v of var a3_2k a4_1_2k a4_2_2k a5_1_2k a5_2_2k a6_1_2k a6_2_2k {
	fre `v' 
}

* replace "WEISS NICHT"=99 "KEINE ANGABEN"= 99 and "OSTASIEN" = "ASIEN"

 foreach v in a4_1_2k a4_2_2k a5_1_2k a5_2_2k a6_1_2k a6_2_2k {
	replace `v'="99" if `v'=="WEISS NICHT"
	replace `v'="99" if `v'=="0"
	replace `v'="OSTASIEN" if `v'=="ASIEN"
}	

* transform string variable into numeric variables 

foreach v of var a3_2k a4_1_2k a4_2_2k a5_1_2k a5_2_2k a6_1_2k a6_2_2k {
	encode `v', gen(`v'n)
}



foreach v in a4_1_2kn a4_2_2kn a5_1_2kn a5_2_2kn a6_1_2kn a6_2_2kn {
	replace `v' = `v'-1
	recode `v'(0=99)
	fre `v'
}


*relabel variable 
foreach v in a3_2kn a4_1_2kn a4_2_2kn a5_1_2kn a5_2_2kn a6_1_2kn a6_2_2kn {
	label define migrationknlb 1"AFRIKA/SUBSAHARA" 2"EX-JUG./ALBANIEN" 3"EX-SOWJETUNION" 4"LATEINAMERIKA" 5"OSTASIEN "6 "OSTEUROPA" 7 "POLEN" 8"SUEDEUROPA" 9"TUERKEI" 10 "USA/KAN/AUS" 11"V.ASIEN/MAGHREB" 12 "WESTEUROPA" 99"99", replace
	label val `v' migrationknlb
	fre `v'
}

* generate new detailed migration variable a3_1rn & ax_x_1rn with 12 occurencies
	** 0"DEUTSCH"  1"AFRIKA/SUBSAHARA" 2"EX-JUG./ALBANIEN" 3"EX-SOWJETUNION" 4"LATEINAMERIKA" 5"OSTASIEN "6 "OSTEUROPA" 
	** 7 "POLEN" 8"SUEDEUROPA" 9"TUERKEI" 10 "USA/KAN/AUS" 11"V.ASIEN/MAGHREB" 12 "WESTEUROPA" 
	** 99 "WEISS NICHT" -11"LAND FEHLT"

foreach v of var a3_1r a4_1_1r a4_2_1r a5_1_1r a5_2_1r a6_1_1r a6_2_1r {
	gen `v'n=`v'
}

* kid
fre a3_1r a3_2kn
 
replace a3_1rn = a3_2kn if a3_1r !=0 & a3_2kn>=1
replace a3_1rn=-11 if a3_1r==1 & a3_2kn==. 
replace a3_1rn=99 if a3_1r==. & a3_2kn==.
fre a3_1rn 

*parents
fre a4_1_1r a4_1_2kn 
fre a4_2_1r a4_2_2kn

replace a4_1_1rn = a4_1_2kn if a4_1_1r !=0 & a4_1_2kn>=1
replace a4_1_1rn=-11 if a4_1_1r==1 & a4_1_2kn==.
replace a4_1_1rn=99 if a4_1_1r==. & a4_1_2kn==.
fre a4_1_1rn 

replace a4_2_1rn = a4_2_2kn if a4_2_1r !=0 & a4_2_2kn>=1
replace a4_2_1rn=-11 if a4_2_1r==1 & a4_2_2kn==.
replace a4_2_1rn=99 if a4_2_1r==. & a4_2_2kn==.
fre a4_2_1rn 

*grandparents
fre a5_1_1r a5_1_2kn 
fre a5_2_1r a5_2_2kn 
fre a6_1_1r a6_1_2kn 
fre a6_2_1r a6_2_2kn 

fre a5_1_1rn

forvalues i=5/6 {
	forvalues j=1/2 {
		replace a`i'_`j'_1rn=a`i'_`j'_2kn if a`i'_`j'_1r !=0 & a`i'_`j'_2kn >=1  
		replace a`i'_`j'_1rn=-11 if a`i'_`j'_1r==1 & a`i'_`j'_2kn==. 
		replace a`i'_`j'_1rn=99 if (a`i'_`j'_1r==. | a`i'_`j'_1r==9) & a`i'_`j'_2kn==.
		fre a`i'_`j'_1rn 
		}
}	

* label detailed migration variables
foreach v in a3_1rn a4_1_1rn a4_2_1rn a5_1_1rn a5_2_1rn a6_1_1rn a6_2_1rn {
	label define migrationrnlb 0 "DEUTSCHLAND" 1"AFRIKA/SUBSAHARA" 2"EX-JUG./ALBANIEN" 3"EX-SOWJETUNION" 4"LATEINAMERIKA" ///
	5"OSTASIEN "6 "OSTEUROPA" 7 "POLEN" 8"SUEDEUROPA" 9"TUERKEI" 10 "USA/KAN/AUS" 11"V.ASIEN/MAGHREB" 12 "WESTEUROPA" 99"MISSING" -11"LAND FEHLT", replace
	label val `v' migrationrnlb
	*fre `v'  
}

foreach v in a3_1rn a4_1_1rn a4_2_1rn a5_1_1rn a5_2_1rn a6_1_1rn a6_2_1rn {
	list a3_1r a3_1rn a4_1_1r a4_1_1rn a4_2_1r a4_2_1rn a5_1_1r a5_1_1rn a5_2_1r a5_2_1rn a6_1_1r a6_1_1rn a6_2_1r  a6_2_1rn if `v'==-11
}	



********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 

**** STEP 2: GENERATE VARIABLES WITH 14 OCCURRENCES*****
	**1 "Deutsch" 
	**2 "Afrika/Subsahara"
	**3 "Ex-Jugo./Albanien"
	**4 "Ex. Sowietunion"
	**5 "Lateinamerika"
	**6 "Ostasien"
	**7 "Osteuropa"
	**8 "Polen"
	**9 "Suedeuropa"
	**10 "Tuerkey"
	**11 "US/Kananda/Australien"
	**12 "Vorderasien/Maghreb"
	**13 "Westeuropa"
	**14 "Restkategorie (Land fehlt)"
	**99 "Missing (weiss nicht, keine Angaben)"

foreach v in a3_1rn a4_1_1rn a4_2_1rn a5_1_1rn a5_2_1rn a6_1_1rn a6_2_1rn {
	recode `v' (0=1) (1=2)(2=3)(3=4)(4=5)(5=6)(6=7)(7=8)(8=9)(9=10)(10=11)(11=12)(12=13)(-11=14), gen(`v'a)
	label define migrationrnalb 1"Deutsch" 2"Afrika/Subsahara" 3"Ex-Jugo./Albanien" 4"Ex. Sowietunion" 5"Lateinamerika" 6"Ostasien" 7"Osteuropa" ///
	8"Polen" 9"Suedeuropa" 10"Tuerkey" 11"US/Kananda/Australien" 12"Vorderasien/Maghreb" 13"Westeuropa" 14 "Restkategorie" 99"Missing, Weiss nicht, Land fehlt" , replace
	label val `v'a migrationrnalb
	fre `v'a  
}

*generate count variable for any value in a3_1rna a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna
 forvalues i = 1/14 {
		egen ethnkidDE_`i' = anycount (a3_1rna), values (`i')
		egen ethnparDE_`i' = anycount (a4_1_1rna a4_2_1rna), values (`i')
		egen ethngpDE_`i' = anycount (a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna), values (`i')
				  }	
egen ethnkidDE_miss = anycount(a3_1rna), values(99)
egen ethnparDE_miss =  anycount (a4_1_1rna a4_2_1rna), values (99)  
egen ethngpDE_miss  = anycount (a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna), values (99)  

label var ethnkidDE_miss "DE kid missing"
label var ethnparDE_miss "DE parents missing"
label var ethngpDE_miss "DE grandparents missing"

label var ethnkidDE_1 "DE kid Germany"
label var ethnparDE_1 "DE parents Germany"
label var ethngpDE_1 "DE grandparents Germany"

label var ethnkidDE_2 "DE kid Afrika/Subsahara"
label var ethnparDE_2 "DE parents Afrika/Subsahara"
label var ethngpDE_2 "DE grandparents Afrika/Subsahara"

label var ethnkidDE_3 "DE kid Ex-Jugo./Albanien"
label var ethnparDE_3 "DE parents Ex-Jugo./Albanien"
label var ethngpDE_3 "DE grandparents Ex-Jugo./Albanien"

label var ethnkidDE_4 "DE kid Ex. Sowietunion"
label var ethnparDE_4 "DE parents Ex. Sowietunion"
label var ethngpDE_4 "DE grandparents Ex. Sowietunion"
	
label var ethnkidDE_5 "DE kid Lateinamerika"
label var ethnparDE_5 "DE parents Lateinamerika"
label var ethngpDE_5 "DE grandparents Lateinamerika"	

label var ethnkidDE_6 "DE kid Ostasien"
label var ethnparDE_6 "DE parents Ostasien"
label var ethngpDE_6 "DE grandparents Ostasien"	

label var ethnkidDE_7 "DE kid Osteuropa"
label var ethnparDE_7 "DE parents Osteuropa"
label var ethngpDE_7 "DE grandparents Osteuropa"		
	
label var ethnkidDE_8 "DE kid Polen"
label var ethnparDE_8 "DE parents Polen"
label var ethngpDE_8 "DE grandparents Polen"

label var ethnkidDE_9 "DE kid Suedeuropa"
label var ethnparDE_9 "DE parents Suedeuropa"
label var ethngpDE_9 "DE grandparents Suedeuropa"

label var ethnkidDE_10 "DE kid Tuerkey"
label var ethnparDE_10 "DE parents Tuerkey"
label var ethngpDE_10 "DE grandparents Tuerkey"

label var ethnkidDE_11 "DE kid US/Kananda/Australien"
label var ethnparDE_11 "DE parents US/Kananda/Australien"
label var ethngpDE_11 "DE grandparents US/Kananda/Australien"

label var ethnkidDE_12 "DE kid Vorderasien/Maghreb"
label var ethnparDE_12 "DE parents Vorderasien/Maghreb"
label var ethngpDE_12 "DE grandparents Vorderasien/Maghreb"
	
label var ethnkidDE_13 "DE kid Westeuropa"
label var ethnparDE_13 "DE parents Westeuropa "
label var ethngpDE_13 "DE grandparents Westeuropa"	

label var ethnkidDE_14 "DE kid Restkategorie"
label var ethnparDE_14 "DE parents Restkategorie"
label var ethngpDE_14 "DE grandparents Restkategorie"	
	
foreach v of var ethnkidDE_* ethnparDE_* ethngpDE_* {
		fre `v'
}

********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 
	
**** STEP 3: GENERATE ETHNICITY VARIABLE*****
* Generate Ethnicity Variable with 16 Occurences
	**1 "Deutsch" 
	**2 "Afrika/Subsahara"
	**3 "Ex-Jugo./Albanien"
	**4 "Ex. Sowietunion"
	**5 "Lateinamerika"
	**6 "Ostasien"
	**7 "Osteuropa"
	**8 "Polen"
	**9 "Suedeuropa"
	**10 "Tuerkey"
	**11 "US/Kananda/Australien"
	**12 "Vorderasien/Maghreb"
	**13 "Westeuropa"
	**14 "Restkategorie"
	
	**15 "gemischter nicht DE MH"

	**16 "gemischt Deutsch/T�rkisch"
	**17 "gemischt Deutsch/anderer MH"
		
	**99 "Missing (weiss nicht, keine Angaben)"
	
capture drop ethn0DE
gen ethn0DE=.

* RULE 1: if two par have mb kid has mb as well (do not consider information of gp)
forvalues i=2/14 {
	replace ethn0DE=`i' if ethnparDE_`i'==2
}
*fre ethn0DE

* RULE 2: on top of that the ethnicity is defined by having at least three gp of the same background 
forvalues i=1/14 {
           replace ethn0DE=`i' if ethn0DE==. & ethngpDE_`i'>=3
}

* >= 3 gp parents of mixed foreign background 
replace ethn0DE=15 if ethn0DE==. & ethngpDE_1 ==1 & ethngpDE_miss==0
replace ethn0DE=15 if ethn0DE==. & ethngpDE_1 ==0 & ethngpDE_miss<=1
*fre ethn0DE

* RULE 3: mixed German/foreign ethnicity if two gp (or one parent) are native and two gp (or one parent) are foreign 
* German/Turkish
replace ethn0DE=16 if  ethn0DE==. & ethngpDE_1==2 & ethngpDE_10==2
replace ethn0DE=16 if  ethn0DE==. & (ethnparDE_1==1 & ethngpDE_1<=2) & ethngpDE_10==2
replace ethn0DE=16 if  ethn0DE==. & ethngpDE_1==2 & (ethnparDE_10==1 & ethngpDE_10<=2)
replace ethn0DE=16 if  ethn0DE==. & ethnparDE_1==1 & ethnparDE_10==1

replace ethn0DE=16 if  ethn0DE==. & ethnparDE_1==2 & ethngpDE_10==2 & ethngpDE_miss==2
replace ethn0DE=16 if  ethn0DE==. & ethngpDE_1==1 &  ethngpDE_10==2 & ethngpDE_miss==1 

* German/other migration background
forvalues i=2/14 {
			replace ethn0DE=17 if  ethn0DE==. & ethngpDE_1==2 & ethngpDE_`i'==2
			replace ethn0DE=17 if  ethn0DE==. & (ethnparDE_1==1 & ethngpDE_1<=2) & ethngpDE_`i'==2
			replace ethn0DE=17 if  ethn0DE==. & ethngpDE_1==2 & (ethnparDE_`i'==1 & ethngpDE_`i'<=2)
			replace ethn0DE=17 if  ethn0DE==. & ethnparDE_1==1 & ethnparDE_`i'==1
			
			replace ethn0DE=17 if  ethn0DE==. & ethnparDE_1==2 & ethngpDE_`i'==2 & ethngpDE_miss==2
			replace ethn0DE=17 if  ethn0DE==. & ethngpDE_1==2 & ethngpDE_miss==0
			replace ethn0DE=17 if  ethn0DE==. & ethngpDE_1==1 & ethngpDE_`i'==2 & ethngpDE_miss==1 
			replace ethn0DE=17 if  ethn0DE==. & ethngpDE_1==1 & ethngpDE_miss==1
}
*fre ethn0DE

* RULE 4a: native if information of gp is lacking and one parent is native and the other is native or unkown
replace ethn0DE=1 if ethn0DE==. & ethnparDE_1==2  
replace ethn0DE=1 if ethn0DE==. & ethnparDE_1==1 & ethnparDE_miss==1 
*replace ethn0DE=1 if ethn0DE==. & ethngpDE_1==2 & ethnparDE_miss==2 & ethngpDE_miss==2 

* RULE 4b: foreign if information of gp is lacking and one parent is foreign and the other is unkown
forvalues i=2/14 {
			replace ethn0DE=`i' if  ethn0DE==. & ethnparDE_`i'==1 & ethnparDE_miss==1 
			*replace ethn0DE=`i' if  ethn0DE==. & ethngpDE_`i'==2 & ethnparDE_miss==2 & ethngpDE_miss==2
}

* parents are of mixed foreign background
replace ethn0DE=14 if ethn0DE==. & ethnparDE_1==0 & ethnparDE_miss==0

* RULE 5: missing if parents are unknown and two or more grand parents are unknown
replace ethn0DE=99 if ethn0DE==. & ethnparDE_miss==2 & ethngpDE_miss >=2

*label variable ethn0DE
recode ethn0DE (1=0) (2=1) (3=2) (4=3) (5=4) (6=5) (7=6) (8=7) (9=8) (10=9) (11=10) (12=11) (13=12) (14=13) (15=14) (16=15) (17=16)
label var ethn0DE "ethnicity 17 cat"
label define ethn0DElb  0"Deutsch" 1"Afrika/Subsahara" 2"Ex-Jugo./Albanien" 3"Ex. Sowietunion" 4"Lateinamerika" 5"Ostasien" 6"Osteuropa" ///
	7"Polen" 8"Suedeuropa" 9"Tuerkey" 10"US/Kananda/Australien" 11"Vorderasien/Maghreb" 12"Westeuropa" ///
	13 "Restkategorie" 14 "gemischter nicht DE MH" 15 "gemischt Deutsch/T�rkisch" 16 "gemischt Deutsch/anderer MH" ///
	99 "Weiss nicht, Missing" , replace
label val ethn0DE ethn0DElb
fre ethn0DE 

list a3_1r a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethn0DE==15



********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 
	
**** STEP 4: GENERATE SUMMARISED ETHNICITY VARIABLEs*****
*input variable
fre ethn0DE

***** ethn0DEd - DUMMY VARIABLE *****
recode ethn0DE (1/16=1), gen(ethn0DEd)
label var ethn0DEd "ethnicity dummy"
label define ethn0DEdlb 0"Germany" 1 "migration background" 99"Missing", replace
label val ethn0DEd ethn0DEdlb
fre ethn0DEd
tab ethn0DEd a4d

***** ethn1DE - 9 occurrences *****
capture drop ethn1DE ethnparDE_378913 ethngpDE_378913 ethnparDE_212 ethngpDE_212

*recode all values ethn0DE except ethn0DE=14
recode ethn0DE (0=0)(2 6 7 8 12=1) (9=2) (1 11=3) (3=4) (5=5) (4 10 13 =6) (15=7) (16=8) , gen(ethn1DE)

*for ethn0DE=14 (mixed non German migration background) check whether >= 3 grandparents come from Europe or Africa.
*if yes, replace values Europe and Africa
*count grandparents Europe
egen ethnparDE_378913 = anycount (a4_1_1rna a4_2_1rna), values (3 7 8 9 13)
egen ethngpDE_378913 = anycount (a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna), values (3 7 8 9 13)
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethnparDE_378913==2 & ethn0DE==14
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethngpDE_378913==3 & ethn0DE==14

*count grandparents Africa
egen ethnparDE_212 = anycount (a4_1_1rna a4_2_1rna), values (2 12)
egen ethngpDE_212 = anycount (a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna), values (2 12)
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethnparDE_212==2 & ethn0DE==14
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethngpDE_212==3 & ethn0DE==14

*replace values mixed non German grandparents
* RULE 1: if two par have mb kid has mb as well (do not consider information of gp)
replace ethn1DE=1 if ethn0DE==14 & ethnparDE_378913==2
replace ethn1DE=3 if ethn0DE==14 & ethnparDE_212==2

* RULE 2: on top of that the ethnicity is defined by having at least three gp of the same background 
replace ethn1DE=1 if ethn0DE==14 & ethngpDE_378913>=3
replace ethn1DE=3 if ethn0DE==14 & ethngpDE_212>=3

* remaining values go over to rest of world
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethn1DE==14

recode ethn1DE (14=6)

label define ethn1DElb 0 "Germany" 1 "Europe without Ex-Soviet" 2 "Turkey" 3 "Africa" 4 "Ex-Soviet" 5 "Eastern Asia" 6 "Rest of world" ///
 7 "mixed German/Turkish" 8"mixed German/other" 99 "Missing", replace
label val ethn1DE ethn1DElb
fre ethn1DE 

***** ethn2DE - 7 occurrences *****
recode ethn1DE (4=3) (5=4)(6=4)(7=5)(8=6), gen (ethn2DE)
label var ethn2DE "ethnicity 7 cat"
label define ethn2DElb 0 "Germany" 1 "Europe without Ex-Soviet" 2 "Turkey" 3"Africa and Ex-Soviet" 4"Asia and World" 5 "Mixed German/Turkish" 6"Mixed German/other" 99 "Missing", replace
label val ethn2DE ethn2DElb
fre ethn2DE 

***** ethn3DE - 5 occurrences *****
recode ethn2DE (5/6=0), gen (ethn3DE)
label var ethn3DE "ethnicity 5 cat Germany+mixed"
label define ethn3DElb 0 "Germany with mixed" 1 "Europe without Ex-Soviet" 2 "Turkey" 3"Africa and Ex-Soviet" 4"Asia and World" 99 "Missing", replace
label val ethn3DE ethn3DElb
fre ethn3DE

***** ethn4DEa - definition - 12 occurrences *****
recode ethn0DE (0=0) (9=1) (8=2) (3=3) (2=4) (7=5) (6=6) (11=7) (1=8) (15=10) (16=11) (4 5 10 12 13 14=9), gen(ethn4DEa)
label var ethn4DEa "ethnicity new 12 cat"
label define ethn4DEalb 0 "Germany" 1 "Turkey" 2 "Sud-EU" 3 "Ex-Soviet" 4 "Ex-Jugoslavia/Albania" 5 "Poland" 6 "Other Eastern Europe" 7 "Maghreb/Muslim Asia" ///
 8 "Subsahrian Africa" 9"Other migration background" 10"mixed Germany/Turkey" 11"mixed Germany/other migration background" 99 "Missing", replace
label val ethn4DEa ethn4DEalb
fre ethn4DEa
*tab ethn4DEa a4f, nol
*list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if  ethn4DEa==11 & a4f==0

***** ethn4DEb - definition - 10 occurrences *****
recode ethn4DEa (0=0) (1=1) (2=2) (3=3) (5=4) (4=5)(6=5) (7=6) (8=7) (9=7) (10=8) (11=9), gen(ethn4DEb)
label var ethn4DEb "ethnicity new 10 cat"
label define ethn4DEblb 0 "Germany" 1 "Turkey" 2 "Sud-EU" 3 "Ex-Soviet" 4 "Poland" 5  "Other Eastern Europe" 6 "Maghreb/Muslim Asia" ///
  7"Other migration background" 8"mixed Germany/Turkey" 9"mixed Germany/other migration background" 99 "Missing", replace
label val ethn4DEb ethn4DEblb
fre ethn4DEb 

tab ethn4DEb a4_1_1rna

***** ethn4DEc - old definition - 12 occurrences *****
recode ethn4DEa (0=0) (1=1) (3=2) (2=3) (4/9=3) (10/11=4), gen(ethn4DEc)
label var ethn4DEc "ethnicity new 5 cat mixed seperately"
label define ethn4DEclb 0 "Germany" 1 "Turkey" 2  "Ex-Soviet" 3 "other migrants" 4 "mixed German/migrants" 99 "Missing", replace
label val ethn4DEc ethn4DEclb


********* ********* ********* ********* ********* ********* ********* ******** ********** ********* ********* ********* ********* ********* 
**** STEP 5: GENERATE GENERATION VARIABLE*****
*1, 2 and 3 generation

* 1. GENERATION : kid born abroad, both parents born abroad

* 1 1/2 . GENERATION : kid immigrated < 5 years age, both parents born abroad

* 2. GENERATION : 
	*kid born in germany, both parents born abroad
	*kid born in germany, one parent born abroad, less than 2 gp native

* 3. GENERATION : 
	* kid born in germany, both parents born in germany
	* kid born in germany, one parent born abroad, two gp native 

capture drop generat*
fre a4generationA a4generationB

***** generat1DE - 9 occurences *****
gen generat1DE=.

replace generat1DE=99 if ethn0DE==99
replace generat1DE=0 if ethn0DE==0 
replace generat1DE=1 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==0 & (a3_2_1 >= 5 | a3_2_1==.)
replace generat1DE=2 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==0 &  a3_2_1 < 5
replace generat1DE=3 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==1 & ethnparDE_1==0 & ethngpDE_1 <2
replace generat1DE=4 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==1 & ethnparDE_1==0 & ethngpDE_1 >=2
replace generat1DE=5 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==1 & ethnparDE_1==1 & ethngpDE_1 <2
replace generat1DE=6 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==1 & ethnparDE_1==1 & ethngpDE_1 ==2
replace generat1DE=7 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==1 & ethnparDE_1==2 & ethngpDE_1 <2
replace generat1DE=8 if ethn0DE>0 & ethn0DE!=99 & ethnkidDE_1==1 & ethnparDE_1==2 & ethngpDE_1 ==2

fre generat1DE

label var generat1DE "immigration generation 9 cat"
label define generat1DElb ///
0 "no immigration" ///
1 "kid foreign: immigrated >=5 yrs age or no information about age of immigration" ///
2 "kid foreign: immigrated < 5 yrs age" ///
3 "kid native: parents foreign, less than two gp native" ///
4 "kid native: parents foreign, two or more gp native" ///
5 "kid native: one parent native, less than two gp native" ///
6 "kid native: one parent native, two gp native" ///
7 "kid native: two parents native, less than two gp native" ///
8 "kid native: two parents native, two gp native" ///
99 "missing", replace
label value generat1DE generat1DElb
fre generat1DE

*check with old definition generation
*tab generat1DE a4generationA
*list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if generat1DE==7 & a4generationA==0
*list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if generat1DE==8 & a4generationA==0
*list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if generat1DE==7 & a4generationA==3


***** generat2DE - 6 occurences *****
recode generat1DE (0=0) (1=1) (2=2) (3=3) (4=3) (7=4) (8=4), gen(generat2DE)
label var generat2DE "immigration generation - short mixed separate"

replace generat2DE=3 if generat1DE==5 & (ethn1DE!=7 | ethn1DE!=8)

* mixed generation trunk 
replace generat2DE=5 if generat1DE==5 & (ethn1DE==7 | ethn1DE==8)
replace generat2DE=5 if generat1DE==6

label define generat2DElb ///
0 "no immigration" ///
1 "1. generation, kid immigrated >=5 yrs age or no information about age of immigration" ///
2 "1. generation, kid immigrated < 5 yrs age" ///
3 "2. generation" ///
4 "3. generation" ///
5 "1 parent foreign + 2 gp foreign, 1 parent native + 2 gp native" ///
99 "missing", replace
label value generat2DE generat2DElb
fre generat2DE

***** generat3DE - 5 occurences *****
recode generat2DE (5=4), gen(generat3DE)
label var generat3DE "immigration generation - short mixed 2. generation"
label define generat3DElb ///
0 "no immigration" ///
1 "1. generation, kid immigrated >=5 yrs age or no information about age of immigration" ///
2 "1. generation, kid immigrated < 5 yrs age" ///
3 "2. generation" ///
4 "3. generation" ///
99 "missing", replace
label value generat3DE generat3DElb

***** generat4DEa - 4 occurences *****
recode generat1DE (0=0) (1/2=1) (3/6=2) (7/8=3), gen(generat4DEa)
label var generat4DEa "immigration generation new, kid age immigration not relevant, 2. generation including mixed"
label define generat4DEalb ///
0 "no immigration" ///
1 "1. generation" ///
2 "2. generation" ///
3 "3. generation" ///
99 "missing", replace
label value generat4DEa generat4DEalb

tab generat4DEa a4generationA
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if generat4DEa==0 & a4generationA==2
list a4_1_1rna a4_2_1rna a5_1_1rna a5_2_1rna a6_1_1rna a6_2_1rna if generat4DEa==3 & a4generationA==0

***** generat4DEb - 4 occurences *****
recode generat1DE (0=0) (1/1=1) (2/6=2) (7/8=3), gen(generat4DEb)
label var generat4DEb "immigration generation new, kid age immigration relevant, 2. generation including mixed"
label define generat4DEblb ///
0 "no immigration" ///
1 "1. generation, kid immigrated >=5 yrs age or no information about age of immigration" ///
2 "2. generation" ///
3 "3. generation" ///
99 "missing", replace
label value generat4DEb generat4DEblb

tab generat4DEb a4generationB


*save data
save "../../STATAfiles/S11_workfile_20150327_mbv2.dta", replace



 
 










 
